We will demonstrate several things which can be done with Jupyter Notebooks.
First we load standard notebooks for analyzing and plotting data.
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
DEMO 1: City of Chicago Budget
We will make a piechart of the City of Chicago budget.First let’s display the PDF with the budget details.
class PDF(object):
def __init__(self, pdf, size=(300,300)):
self.pdf = pdf
self.size = size
def _repr_html_(self):
return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdf, self.size)
def _repr_latex_(self):
return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)
PDF('ChicagoBudget.pdf',size=(700,400))
Next let’s read an Excel sheet with the summary of the budget.
budget=pd.read_excel('ChicagoBudget.xlsx')
budget
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
File //anaconda3/lib/python3.10/site-packages/pandas/compat/_optional.py:142, in import_optional_dependency(name, extra, errors, min_version)
141 try:
--> 142 module = importlib.import_module(name)
143 except ImportError:
File //anaconda3/lib/python3.10/importlib/__init__.py:126, in import_module(name, package)
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1050, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1027, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1004, in _find_and_load_unlocked(name, import_)
ModuleNotFoundError: No module named 'openpyxl'
During handling of the above exception, another exception occurred:
ImportError Traceback (most recent call last)
Cell In[3], line 1
----> 1 budget=pd.read_excel('ChicagoBudget.xlsx')
2 budget
File //anaconda3/lib/python3.10/site-packages/pandas/io/excel/_base.py:478, in read_excel(io, sheet_name, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, date_format, thousands, decimal, comment, skipfooter, storage_options, dtype_backend)
476 if not isinstance(io, ExcelFile):
477 should_close = True
--> 478 io = ExcelFile(io, storage_options=storage_options, engine=engine)
479 elif engine and engine != io.engine:
480 raise ValueError(
481 "Engine should not be specified when passing "
482 "an ExcelFile - ExcelFile already has the engine set"
483 )
File //anaconda3/lib/python3.10/site-packages/pandas/io/excel/_base.py:1513, in ExcelFile.__init__(self, path_or_buffer, engine, storage_options)
1510 self.engine = engine
1511 self.storage_options = storage_options
-> 1513 self._reader = self._engines[engine](self._io, storage_options=storage_options)
File //anaconda3/lib/python3.10/site-packages/pandas/io/excel/_openpyxl.py:548, in OpenpyxlReader.__init__(self, filepath_or_buffer, storage_options)
533 @doc(storage_options=_shared_docs["storage_options"])
534 def __init__(
535 self,
536 filepath_or_buffer: FilePath | ReadBuffer[bytes],
537 storage_options: StorageOptions = None,
538 ) -> None:
539 """
540 Reader using openpyxl engine.
541
(...)
546 {storage_options}
547 """
--> 548 import_optional_dependency("openpyxl")
549 super().__init__(filepath_or_buffer, storage_options=storage_options)
File //anaconda3/lib/python3.10/site-packages/pandas/compat/_optional.py:145, in import_optional_dependency(name, extra, errors, min_version)
143 except ImportError:
144 if errors == "raise":
--> 145 raise ImportError(msg)
146 return None
148 # Handle submodules: if we have submodule, grab parent module from sys.modules
ImportError: Missing optional dependency 'openpyxl'. Use pip or conda to install openpyxl.
A pie chart will show us the proportions.
fig, ax = plt.subplots(figsize=(3,3)) #you can adjust the figsize (5,5)=(length,width)
plt.rcParams['font.size'] = 3 #fontsize
budget_items = budget["EXPENSE"] #categories
budget_amounts = budget["2023 BUDGET"] #amounts
total=sum(budget_amounts)
ax=plt.pie(budget_amounts,labels=budget_items,autopct=lambda p: '${:.0f}'.format(p * total / 100)) #make pie chart autopct='%1.0f%%'
plt.gca().set_title('Budget Breakdown $'+str(total)+' (13+ billion) in Expenses',size=5) #add a title
fig.savefig('Budget.png') #save the piechart to a file Budget.png
Demo 2 Pixel Images
We can increase the resolution of images by increasing the number of pixels.# PACKAGE: DO NOT EDIT THIS CELL
%matplotlib inline
from ipywidgets import interact
import cv2, os
!pip install opencv-python
Requirement already satisfied: opencv-python in c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages (4.6.0.66)
Requirement already satisfied: numpy>=1.13.3 in c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages (from opencv-python) (1.19.3)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
def makepixelimage(folder, N):
directory = folder
# A data structure called a dictionary is used to store the image data and the dataframes we'll make from them.
imgs = {}
dfs = {}
# Specify the pixel image size
dsize = (N, N)
# This will iterate over every image in the directory given, read it into data, and create a
# dataframe for it. Both the image data and its corresponding dataframe are stored.
# Note that when being read into data, we interpret the image as grayscale.
pos = 0
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(f):
imgs[pos] = cv2.imread(f, 0) # image data
imgs[pos] = cv2.resize(imgs[pos], dsize)
dfs[pos] = pd.DataFrame(imgs[pos]) # dataframe
pos += 1
return plt.imshow(imgs[0], cmap="gray")
makepixelimage("images", 8)
<matplotlib.image.AxesImage at 0x246a88c7fd0>
makepixelimage("images", 16) #16x16 image
<matplotlib.image.AxesImage at 0x246a893b710>
makepixelimage("images", 32) #32x32 image
<matplotlib.image.AxesImage at 0x246a89a31d0>
DEMO 3: Track NFL Player Positions
We will plot the movement of two players step by step in a given play Dataset: NFL_play.xlsxImport special libraries.
import matplotlib.animation as animation
from matplotlib.animation import FuncAnimation
Read the Player Tracking Data
track_play=pd.read_excel('NFL_play.xlsx')
track_play.head(22)
| game_play | game_key | play_id | nfl_player_id | datetime | step | team | position | jersey_number | x_position | y_position | speed | distance | direction | orientation | acceleration | sa | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 58580_001136 | 58580 | 1136 | 44830 | 2021-10-10T21:08:20.900Z | -108 | away | CB | 22 | 61.59 | 42.60 | 1.11 | 0.11 | 320.33 | 263.93 | 0.71 | -0.64 |
| 1 | 58580_001136 | 58580 | 1136 | 42355 | 2021-10-10T21:08:20.900Z | -108 | away | NT | 75 | 59.63 | 24.33 | 0.10 | 0.01 | 7.98 | 227.03 | 0.41 | 0.27 |
| 2 | 58580_001136 | 58580 | 1136 | 43330 | 2021-10-10T21:08:20.900Z | -108 | away | ILB | 55 | 60.67 | 30.89 | 3.19 | 0.32 | 334.89 | 303.31 | 1.95 | -1.73 |
| 3 | 58580_001136 | 58580 | 1136 | 52425 | 2021-10-10T21:08:20.900Z | -108 | home | WR | 88 | 56.59 | 42.86 | 0.13 | 0.01 | 158.78 | 98.31 | 0.32 | 0.02 |
| 4 | 58580_001136 | 58580 | 1136 | 43293 | 2021-10-10T21:08:20.900Z | -108 | home | RB | 21 | 51.11 | 26.42 | 0.14 | 0.01 | 144.58 | 78.52 | 0.52 | 0.51 |
| 5 | 58580_001136 | 58580 | 1136 | 40031 | 2021-10-10T21:08:20.900Z | -108 | away | FS | 23 | 70.53 | 22.03 | 0.32 | 0.03 | 285.68 | 287.44 | 0.28 | 0.27 |
| 6 | 58580_001136 | 58580 | 1136 | 41242 | 2021-10-10T21:08:20.900Z | -108 | home | G | 70 | 57.33 | 24.80 | 0.03 | 0.01 | 328.04 | 57.38 | 0.07 | 0.07 |
| 7 | 58580_001136 | 58580 | 1136 | 52938 | 2021-10-10T21:08:20.900Z | -108 | home | T | 78 | 57.27 | 23.47 | 0.19 | 0.02 | 356.50 | 87.29 | 0.10 | -0.10 |
| 8 | 58580_001136 | 58580 | 1136 | 42347 | 2021-10-10T21:08:20.900Z | -108 | home | WR | 19 | 56.23 | 10.68 | 0.07 | 0.01 | 132.91 | 123.39 | 0.19 | -0.14 |
| 9 | 58580_001136 | 58580 | 1136 | 46135 | 2021-10-10T21:08:20.900Z | -108 | away | OLB | 59 | 59.90 | 21.14 | 1.58 | 0.16 | 218.10 | 278.39 | 0.48 | 0.22 |
| 10 | 58580_001136 | 58580 | 1136 | 43424 | 2021-10-10T21:08:20.900Z | -108 | home | QB | 4 | 57.31 | 26.27 | 0.07 | 0.01 | 207.25 | 93.25 | 0.11 | 0.07 |
| 11 | 58580_001136 | 58580 | 1136 | 43351 | 2021-10-10T21:08:20.900Z | -108 | away | CB | 24 | 64.39 | 10.89 | 0.80 | 0.08 | 130.49 | 309.04 | 0.44 | -0.44 |
| 12 | 58580_001136 | 58580 | 1136 | 45532 | 2021-10-10T21:08:20.900Z | -108 | home | TE | 89 | 57.16 | 30.99 | 0.07 | 0.01 | 39.61 | 100.73 | 0.51 | -0.14 |
| 13 | 58580_001136 | 58580 | 1136 | 46119 | 2021-10-10T21:08:20.900Z | -108 | home | G | 52 | 57.36 | 28.02 | 0.08 | 0.01 | 127.21 | 241.42 | 0.05 | 0.04 |
| 14 | 58580_001136 | 58580 | 1136 | 37082 | 2021-10-10T21:08:20.900Z | -108 | home | T | 77 | 57.16 | 29.54 | 0.12 | 0.01 | 145.27 | 85.16 | 0.07 | 0.01 |
| 15 | 58580_001136 | 58580 | 1136 | 53876 | 2021-10-10T21:08:20.900Z | -108 | away | DE | 91 | 59.65 | 22.71 | 0.23 | 0.02 | 268.69 | 271.27 | 0.23 | -0.23 |
| 16 | 58580_001136 | 58580 | 1136 | 53479 | 2021-10-10T21:08:20.900Z | -108 | away | OLB | 51 | 59.47 | 29.52 | 0.24 | 0.02 | 296.78 | 257.94 | 1.12 | 0.26 |
| 17 | 58580_001136 | 58580 | 1136 | 52663 | 2021-10-10T21:08:20.900Z | -108 | away | ILB | 48 | 63.25 | 27.50 | 0.51 | 0.05 | 183.62 | 253.71 | 0.31 | 0.31 |
| 18 | 58580_001136 | 58580 | 1136 | 46206 | 2021-10-10T21:08:20.900Z | -108 | home | TE | 86 | 57.37 | 22.12 | 0.37 | 0.04 | 127.85 | 63.63 | 0.69 | 0.62 |
| 19 | 58580_001136 | 58580 | 1136 | 52444 | 2021-10-10T21:08:20.900Z | -108 | away | FS | 29 | 72.19 | 31.46 | 0.61 | 0.06 | 11.77 | 247.69 | 0.63 | -0.33 |
| 20 | 58580_001136 | 58580 | 1136 | 47800 | 2021-10-10T21:08:20.900Z | -108 | away | DE | 97 | 59.48 | 26.81 | 0.23 | 0.01 | 346.84 | 247.16 | 1.29 | 0.90 |
| 21 | 58580_001136 | 58580 | 1136 | 52554 | 2021-10-10T21:08:20.900Z | -108 | home | C | 63 | 58.18 | 26.52 | 0.16 | 0.02 | 357.62 | 102.55 | 0.60 | 0.58 |
Plot the positions of the players at step -108 (before the snap) of play 1136.
fig= plt.figure(figsize=(8,4))
temp=track_play[track_play["step"]==-108]
xmin=temp["x_position"].min()
xmax=temp["x_position"].max()
ymin=temp["y_position"].min()
ymax=temp["y_position"].max()
plt.xlim(xmin-1,xmax+1)
plt.ylim(ymin-1,ymax+1)
for i in temp.index:
x=temp.loc[i,"x_position"]
y=temp.loc[i,"y_position"]
n=temp.loc[i,"nfl_player_id"]
p=temp.loc[i,"position"]
if temp.loc[i,"team"]=='home':
plt.text(x, y, p+str(n),color='b',size=5)
else:
plt.text(x, y, p+str(n),color='r',size=5)
plt.title("Play 1136 Step -108",size=10)
plt.show()
Let’s define a function which gives the position of players at any step of any play.
def teampositions(data,play,step):
playdf=data[data["play_id"]==play]
playdf = playdf.sort_values(by = 'step')
playdf=playdf.reset_index(drop=True)
stepdf=playdf[playdf["step"]==step]
xmin=stepdf["x_position"].min()
xmax=stepdf["x_position"].max()
ymin=stepdf["y_position"].min()
ymax=stepdf["y_position"].max()
fig= plt.figure(figsize=(8,4))
plt.xlim(xmin-1,xmax+1)
plt.ylim(ymin-1,ymax+1)
for i in stepdf.index:
x=stepdf.loc[i,"x_position"]
y=stepdf.loc[i,"y_position"]
n=stepdf.loc[i,"nfl_player_id"]
p=stepdf.loc[i,"position"]
if stepdf.loc[i,"team"]=='home':
plt.text(x, y, p+str(n),color='b',size=5)
else:
plt.text(x, y, p+str(n),color='r',size=5)
plt.title("Play"+str(play)+ " Step"+str(step),size=10)
plt.show()
return
teampositions(track_play,1136,10)
Let’s define a function to animate the movement of two designated players in a specified play.
def movement(data,play,player1,player2):
playdf=data[data["play_id"]==play]
playdf = playdf.sort_values(by = 'step')
playdf=playdf.reset_index(drop=True)
xmin=playdf["x_position"].min()
xmax=playdf["x_position"].max()
ymin=playdf["y_position"].min()
ymax=playdf["y_position"].max()
player1df=playdf[playdf["nfl_player_id"]==player1]
player1df = player1df.sort_values(by = 'step')
player1df=player1df.reset_index(drop=True)
player2df=playdf[playdf["nfl_player_id"]==player2]
player2df = player2df.sort_values(by = 'step')
player2df=player2df.reset_index(drop=True)
fig= plt.figure(figsize=(5,3))
plt.xlim(xmin-1,xmax+1)
plt.ylim(ymin-1,ymax+1)
x1=player1df["x_position"]
y1=player1df["y_position"]
x2=player2df["x_position"]
y2=player2df["y_position"]
def init(): #inital point on sine curve\
bluex,=plt.gca().plot([x1[0]],[y1[0]])
redo, = plt.gca().plot([x2[0]],[y2[0]])
return bluex, redo,
def animate(i):
bluex,= plt.gca().plot(x1[i],y1[i],'bx',ms=2,alpha=1)
redo,=plt.gca().plot([x2[i]],[y2[i]],'ro',ms=2,alpha=1)
return bluex, redo,
# create animation using the animate() function
ani = animation.FuncAnimation(fig, animate, frames=np.arange(1,len(playdf["step"])/22,1), init_func=init, interval=5, blit=True, repeat=False)
plt.show()
return ani
Let’s check the wide receiver (42347) and cornerback (43351) shown at the bottom of the chart in Step 4).
%matplotlib notebook
movement(track_play,1136,42347,43351)
<matplotlib.animation.FuncAnimation at 0x246a8d8ee48>
Traceback (most recent call last):
File "C:\Users\pisihara\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\cbook\__init__.py", line 388, in process
proxy(*args, **kwargs)
File "C:\Users\pisihara\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\cbook\__init__.py", line 228, in __call__
return mtd(*args, **kwargs)
File "C:\Users\pisihara\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\animation.py", line 1560, in _stop
self.event_source.remove_callback(self._loop_delay)
AttributeError: 'NoneType' object has no attribute 'remove_callback'
Demo 4 Word Clouds
Let's make a word cloud Christmas card using the song "Twelve Days of Christmas."import wordcloud
#Define a function which counts the interesting words
def calculate_frequencies(textfile):
#list of punctuations
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
#list of uninteresting words
uninteresting_words = ["AND","BY","IT","THE","THAT","A","IS","HAD","TO","NOT","BUT","FOR","OF","WHICH","IF","IN","ON","WERE","YE","THOU"]
# removes punctuation and uninteresting words
import re
fc1=str(textfile)
fc2= fc1.split(' ')
for i in range(len(fc2)):
fc2[i] = fc2[i].upper()
#Remove punctuations
fc3 = []
for s in fc2:
if not any([o in s for o in punctuations]):
fc3.append(s)
#Remove uninteresting words
fc4=[]
for s in fc3:
if not any([o in s for o in uninteresting_words]):
fc4.append(s)
fc5=[]
for s in fc4:
if not any([o.lower() in s for o in uninteresting_words]):
fc5.append(s)
while('' in fc5) :
fc5.remove('')
import collections
fc6 = collections.Counter(fc5)
#wordcloud
cloud = wordcloud.WordCloud( max_words = 15) #can adjust the number of words
cloud.generate_from_frequencies(fc6)
return cloud.to_array()
%matplotlib notebook
#Open the text file with the words to be plotted.
with open('twelvedays.txt','r') as file:
carol = file.readlines()
#make the wordcloud
carol = calculate_frequencies(carol)
plt.imshow(carol, interpolation = 'nearest')
plt.text(-5,70,"Merry Christmas!",color='r',size=40) #***TASK 2***Add Christmas! after Merry
plt.axis('off')
plt.savefig('card.png', bbox_inches='tight')
Demo 5 Name that Tune
Musical sound waves are created by rapid vibrations caused by musical isntruments.
from IPython.display import YouTubeVideo
YouTubeVideo('tVYQRC1-D54')
Sound waves are represented mathematically by sine waves with different frequencies.
def sinewave(frequency):
#-----------CREATE THE SOUND WAVE-------------------
sampling_rate=44100 #how many times we take a measurement each second
t = np.linspace(0,1,sampling_rate) # take 44100 samples in 1 second;
sound_wave=np.sin(frequency* 2*np.pi* t) # mathematical definition of a sine wave
#----------PLOT THE SOUND WAVE----------------------
import matplotlib.pyplot as plt
fig=plt.figure(figsize=(2,1))
plt.plot(t,sound_wave)
plt.xlabel("seconds")
return
sinewave(1) #frequency=1 and 1 cycle per second
sinewave(2) #frequency=2 and 2 cycles per second
sinewave(20) #frequency=20 and 20 cycles per second
A computer can create a musical tone based on a given frequency.
def play(freq):
import numpy as np
from IPython.display import Audio #library used to create sounds
sampling_rate = 44100 # <- rate of sampling
t = np.linspace(0, 2, sampling_rate) # <- setup time values
sound_wave = np.sin(2 * np.pi * freq * t) # <- sine function formula
return Audio(sound_wave, rate=sampling_rate, autoplay=True) # play the generated sound
play(220) # play a sound at 220 hz
A musical scale is a sequence of frequencies.
from IPython.display import Audio
rest=0
do=220
re=9/8*220
mi=5/4*220
fa=4/3*220
so=3/2*220
la=5/3*220
ti=15/8*220
do1=2*220
re1=2*9/8*220
mi1=2*5/4*220
fa1=2*4/3*220
so1=2*3/2*220
la1=2*5/3*220
ti1=2*15/8*220
do2=2*2*220
scale=[do,re,mi,fa,so,la,ti,do1]
def play(song):
song=np.array(song)
framerate = 44100
t = np.linspace(0, len(song) / 2, round(framerate * len(song) / 2))[:-1]
song_idx = np.floor(t * 2).astype(int)
data = np.sin(2 * np.pi * song[song_idx] * t)
return Audio(data, rate=framerate, autoplay=True)
play(scale)
Can you name that tune?
tune= [so, so , la, la, so, fa,mi,rest,so, so , la, la, so, fa,mi,rest,so,so,la,ti,do1,do1,re1,re1,ti,la,ti,la,so,so,la,ti,do1,do1,ti,la,so,so,rest,rest,la,la,so,fa,mi,mi,rest,rest,so,so,do,fa,mi,mi,re,re,do,do,do,do,rest,rest]
play(tune)
from IPython.display import YouTubeVideo
YouTubeVideo('L4PA-MFSM34')